Daily Assignment 22

Ecosystem Science and Sustainability 330

Author

Samantha Nauman

# Previous data collecting from assignment 21
library(dataRetrieval) 
library(dplyr)         

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tsibble)          
Registered S3 method overwritten by 'tsibble':
  method               from 
  as_tibble.grouped_df dplyr

Attaching package: 'tsibble'
The following objects are masked from 'package:base':

    intersect, setdiff, union
library(timetk)         
library(modeltime)       
library(prophet)      
Loading required package: Rcpp
Loading required package: rlang
library(fable)        
Loading required package: fabletools
library(fable.prophet)    

Attaching package: 'fable.prophet'
The following object is masked from 'package:prophet':

    prophet
library(ggplot2)        
library(lubridate)

Attaching package: 'lubridate'
The following object is masked from 'package:tsibble':

    interval
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
✔ broom        1.0.6     ✔ rsample      1.2.1
✔ dials        1.3.0     ✔ tibble       3.2.1
✔ infer        1.0.7     ✔ tidyr        1.3.1
✔ modeldata    1.4.0     ✔ tune         1.2.1
✔ parsnip      1.2.1     ✔ workflows    1.1.4
✔ purrr        1.0.2     ✔ workflowsets 1.1.0
✔ recipes      1.1.0     ✔ yardstick    1.3.1
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ purrr::%@%()          masks rlang::%@%()
✖ yardstick::accuracy() masks fabletools::accuracy()
✖ purrr::discard()      masks scales::discard()
✖ dplyr::filter()       masks stats::filter()
✖ purrr::flatten()      masks rlang::flatten()
✖ purrr::flatten_chr()  masks rlang::flatten_chr()
✖ purrr::flatten_dbl()  masks rlang::flatten_dbl()
✖ purrr::flatten_int()  masks rlang::flatten_int()
✖ purrr::flatten_lgl()  masks rlang::flatten_lgl()
✖ purrr::flatten_raw()  masks rlang::flatten_raw()
✖ infer::generate()     masks fabletools::generate()
✖ infer::hypothesize()  masks fabletools::hypothesize()
✖ purrr::invoke()       masks rlang::invoke()
✖ dplyr::lag()          masks stats::lag()
✖ parsnip::null_model() masks fabletools::null_model()
✖ rsample::populate()   masks Rcpp::populate()
✖ purrr::splice()       masks rlang::splice()
✖ recipes::step()       masks stats::step()
• Learn how to get started at https://www.tidymodels.org/start/
# Example: Cache la Poudre River at Mouth (USGS site 06752260)
poudre_flow <- readNWISdv(siteNumber = "06752260",    # Download data from USGS for site 06752260
                          parameterCd = "00060",      # Parameter code 00060 = discharge in cfs)
                          startDate = "2013-01-01",   # Set the start date
                          endDate = "2023-12-31") |>  # Set the end date
  renameNWISColumns() |>                              # Rename columns to standard names (e.g., "Flow", "Date")
  mutate(Date = as_date(Date),
         Date = floor_date(Date, "month")) |>                   # Convert daily Date values into a year-month format (e.g., "2023 Jan")
  group_by(Date) |>                                   # Group the data by the new monthly Date
  summarise(Flow = mean(Flow),
            .groups = "drop")                       # Calculate the average daily flow for each month
# Convert to tsibble
poudre_ts <- poudre_flow %>%
  as_tsibble(index = Date)

poudre_ts
# A tsibble: 132 x 2 [1D]
   Date          Flow
   <date>       <dbl>
 1 2013-01-01   18.1 
 2 2013-02-01   18.0 
 3 2013-03-01    8.21
 4 2013-04-01    5.94
 5 2013-05-01  333.  
 6 2013-06-01  300.  
 7 2013-07-01   75.6 
 8 2013-08-01   48.8 
 9 2013-09-01 1085.  
10 2013-10-01  146.  
# ℹ 122 more rows
# Plotting the time series
library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
flowplot <- ggplot(poudre_ts, aes(x = Date, y = Flow)) +
  geom_line() +
  labs(title = "Monthly Mean Discharge\nCache la Poudre River (2013–2023)",
       x = "Year‑Month", y = "Discharge (cfs)")

print(flowplot)

ggplotly(flowplot)
# Assignment 22
# 1. Fit Prophet and Arima Models 
mods <- list(
  prophet_reg() %>%
    set_engine("prophet"),
  arima_reg() %>% 
    set_engine("auto_arima")
)

models <- map(mods, ~ fit(.x, Flow ~ Date, data = poudre_ts))
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
frequency = 12 observations per 1 year
models_tbl <- as_modeltime_table(models)
# 2. Forecast the next 12 months
future_tbl <- poudre_ts %>% future_frame(Date, .length_out = "12 months")

forecast_tbl <- models_tbl %>%
  modeltime_forecast(
    new_data    = future_tbl,
    actual_data = poudre_ts
  ) %>%
  filter(.key == "prediction") %>%
  select(.model_id, Date = .index, Predicted = .value)
Error: Column `Date` (index) must not contain `NA`.
Warning: Unknown or uninitialised column: `.key`.
# 3. Download daily streamflow for the next 12 months and aggregate this data to monthly averages
obs_2024 <- readNWISdv(
    siteNumber  = "06752260",
    parameterCd = "00060",
    startDate   = "2024-01-01",
    endDate     = "2024-12-31"
  ) %>%
  renameNWISColumns() %>%
  mutate(
    Date = as_date(Date),
    Date = floor_date(Date, "month")
  ) %>%
  group_by(Date) %>%
  summarise(Observed = mean(Flow, na.rm = TRUE), .groups = "drop")

# 4. Compute the R2 Values 
compare_tbl <- forecast_tbl %>%
  left_join(obs_2024, by = "Date")

r2_val <- summary(lm(Observed ~ Predicted, data = compare_tbl))$r.squared
cat("R² = ", round(r2_val, 3),
    " → ", round(r2_val * 100, 1),
    "% of observed monthly variance explained by the forecasts.\n", sep = "")
R² = 0.919 → 91.9% of observed monthly variance explained by the forecasts.

R-squared is the proportion of variance in the observed monthly flows that is explained by the forecasts. Values closer to 1 indicate better explanatory power. From this R2 value of 0.92, we can say that our forecasts explain 92% of the month-to-month variability in the observed 2024 flows, indicating the model captures the true seasonal and trend patterns very accurately.

# 5. Predicted vs Observed Values
ggplot(compare_tbl, aes(x = Predicted, y = Observed, color = factor(.model_id))) +
  geom_point(size = 2) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +  # 1:1 line
  geom_smooth(method = "lm", se = FALSE) +                      # fit line
  labs(
    title    = "Forecasted vs Observed Monthly Flow (2024)",
    subtitle = paste0("Models (IDs) & R² = ", round(r2_val, 3)),
    x        = "Forecasted Mean (cfs)",
    y        = "Observed Mean (cfs)",
    color    = "Model\nID"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'